{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6bdb68a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from tsfresh.feature_extraction import extract_features\n",
    "from tsfresh.feature_extraction import settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "77e5e5e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load data\n",
    "\n",
    "X = pd.read_csv(\"occupancy.csv\", parse_dates=[\"date\"])\n",
    "\n",
    "y = pd.read_csv(\"occupancy_target.csv\", index_col=\"id\")[\"occupancy\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "998d9ee8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ItemsView({'sum_values': None, 'median': None, 'mean': None, 'length': None, 'standard_deviation': None, 'variance': None, 'root_mean_square': None, 'maximum': None, 'absolute_maximum': None, 'minimum': None})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select minimal number of features to create\n",
    "\n",
    "minimal_feat = settings.MinimalFCParameters()\n",
    "\n",
    "minimal_feat.items()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6c1921d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'sum_values': None, 'median': None, 'mean': None, 'length': None, 'standard_deviation': None, 'variance': None, 'root_mean_square': None, 'maximum': None, 'absolute_maximum': None, 'minimum': None}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "minimal_feat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "055cfb13",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.29it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(135, 10)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create minimal number of features\n",
    "\n",
    "features = extract_features(\n",
    "    X[[\"id\", \"light\"]],\n",
    "    column_id=\"id\",\n",
    "    default_fc_parameters=minimal_feat,\n",
    ")\n",
    "\n",
    "features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a5ed5311",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>light__sum_values</th>\n",
       "      <th>light__median</th>\n",
       "      <th>light__mean</th>\n",
       "      <th>light__length</th>\n",
       "      <th>light__standard_deviation</th>\n",
       "      <th>light__variance</th>\n",
       "      <th>light__root_mean_square</th>\n",
       "      <th>light__maximum</th>\n",
       "      <th>light__absolute_maximum</th>\n",
       "      <th>light__minimum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2932.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>48.875</td>\n",
       "      <td>60.0</td>\n",
       "      <td>134.485582</td>\n",
       "      <td>18086.371875</td>\n",
       "      <td>143.091361</td>\n",
       "      <td>419.0</td>\n",
       "      <td>419.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   light__sum_values  light__median  light__mean  light__length  \\\n",
       "1             2932.5            0.0       48.875           60.0   \n",
       "2                0.0            0.0        0.000           60.0   \n",
       "3                0.0            0.0        0.000           60.0   \n",
       "4                0.0            0.0        0.000           60.0   \n",
       "5                0.0            0.0        0.000           60.0   \n",
       "\n",
       "   light__standard_deviation  light__variance  light__root_mean_square  \\\n",
       "1                 134.485582     18086.371875               143.091361   \n",
       "2                   0.000000         0.000000                 0.000000   \n",
       "3                   0.000000         0.000000                 0.000000   \n",
       "4                   0.000000         0.000000                 0.000000   \n",
       "5                   0.000000         0.000000                 0.000000   \n",
       "\n",
       "   light__maximum  light__absolute_maximum  light__minimum  \n",
       "1           419.0                    419.0             0.0  \n",
       "2             0.0                      0.0             0.0  \n",
       "3             0.0                      0.0             0.0  \n",
       "4             0.0                      0.0             0.0  \n",
       "5             0.0                      0.0             0.0  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7396dd86",
   "metadata": {},
   "outputs": [],
   "source": [
    "# split data\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    features,\n",
    "    y,\n",
    "    test_size=0.1,\n",
    "    random_state=42,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d38aeb9b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      0.91      0.95        11\n",
      "           1       0.75      1.00      0.86         3\n",
      "\n",
      "    accuracy                           0.93        14\n",
      "   macro avg       0.88      0.95      0.90        14\n",
      "weighted avg       0.95      0.93      0.93        14\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  n_iter_i = _check_optimize_result(\n"
     ]
    }
   ],
   "source": [
    "# train and evaluate classifier\n",
    "\n",
    "cls = LogisticRegression(random_state=10, C=0.01)\n",
    "cls.fit(X_train, y_train)\n",
    "\n",
    "print(classification_report(y_test, cls.predict(X_test)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "08dae809",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "74"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# features that are efficient to compute\n",
    "\n",
    "efficient_feat = settings.EfficientFCParameters()\n",
    "\n",
    "len(efficient_feat.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5bba6879",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "76"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# comprehensive list of features\n",
    "\n",
    "comprehensive_feat = settings.ComprehensiveFCParameters()\n",
    "\n",
    "len(comprehensive_feat.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "494d418a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# different features for different time series\n",
    "\n",
    "light_feat = {\n",
    "    \"sum_values\": None,\n",
    "    \"median\": None,\n",
    "    \"standard_deviation\": None,\n",
    "    \"quantile\": [{\"q\": 0.2}, {\"q\": 0.7}],\n",
    "}\n",
    "\n",
    "co2_feat = {\"root_mean_square\": None, \"number_peaks\": [{\"n\": 1}, {\"n\": 2}]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d5403555",
   "metadata": {},
   "outputs": [],
   "source": [
    "kind_to_fc_parameters = {\n",
    "    \"light\": light_feat,\n",
    "    \"co2\": co2_feat,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f91d6eb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 10/10 [00:04<00:00,  2.17it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(135, 8)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create different features for different time series\n",
    "\n",
    "features = extract_features(\n",
    "    X[[\"id\", \"light\", \"co2\"]],\n",
    "    column_id=\"id\",\n",
    "    kind_to_fc_parameters=kind_to_fc_parameters,\n",
    ")\n",
    "\n",
    "features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7af8a505",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['light__sum_values', 'light__median', 'light__standard_deviation',\n",
       "       'light__quantile__q_0.2', 'light__quantile__q_0.7',\n",
       "       'co2__root_mean_square', 'co2__number_peaks__n_1',\n",
       "       'co2__number_peaks__n_2'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a5ed6b76",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>light__sum_values</th>\n",
       "      <th>light__median</th>\n",
       "      <th>light__standard_deviation</th>\n",
       "      <th>light__quantile__q_0.2</th>\n",
       "      <th>light__quantile__q_0.7</th>\n",
       "      <th>co2__root_mean_square</th>\n",
       "      <th>co2__number_peaks__n_1</th>\n",
       "      <th>co2__number_peaks__n_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2932.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>134.485582</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>656.304517</td>\n",
       "      <td>16.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>573.234731</td>\n",
       "      <td>12.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>521.507749</td>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>493.834529</td>\n",
       "      <td>12.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>476.999701</td>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   light__sum_values  light__median  light__standard_deviation  \\\n",
       "1             2932.5            0.0                 134.485582   \n",
       "2                0.0            0.0                   0.000000   \n",
       "3                0.0            0.0                   0.000000   \n",
       "4                0.0            0.0                   0.000000   \n",
       "5                0.0            0.0                   0.000000   \n",
       "\n",
       "   light__quantile__q_0.2  light__quantile__q_0.7  co2__root_mean_square  \\\n",
       "1                     0.0                     0.0             656.304517   \n",
       "2                     0.0                     0.0             573.234731   \n",
       "3                     0.0                     0.0             521.507749   \n",
       "4                     0.0                     0.0             493.834529   \n",
       "5                     0.0                     0.0             476.999701   \n",
       "\n",
       "   co2__number_peaks__n_1  co2__number_peaks__n_2  \n",
       "1                    16.0                     6.0  \n",
       "2                    12.0                    10.0  \n",
       "3                     8.0                     5.0  \n",
       "4                    12.0                     9.0  \n",
       "5                    15.0                     8.0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cae5efb1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
